WGCNA

last updated: 2023-11-15

Install Packages

As usual, make sure we have the right packages for this exercise

if (!require("pacman")) install.packages("pacman"); library(pacman)
## Loading required package: pacman
# let's load all of the files we were using and want to have again today
p_load("tidyverse", "knitr", "readr",
       "pander", "BiocManager", 
       "dplyr", "stringr", 
       "statmod", # required dependency, need to load manually on some macOS versions.
       "Glimma", # beautifies limma results
       "purrr", # for working with lists (beautify column names)
       "reactable") # for pretty tables.

# for today:
# p_load("WGCNA")   # WGCNA is available on CRAN
if (!require("WGCNA")) BiocManager::install("WGCNA", force=TRUE); library(WGCNA)
## Loading required package: WGCNA
## Loading required package: dynamicTreeCut
## Loading required package: fastcluster
## 
## Attaching package: 'fastcluster'
## The following object is masked from 'package:stats':
## 
##     hclust
## 
## 
## Attaching package: 'WGCNA'
## The following object is masked from 'package:stats':
## 
##     cor
# 
# # We also need these Bioconductor packages today.
p_load("edgeR")
p_load("matrixStats")
p_load("org.Sc.sgd.db")
p_load("AnnotationDbi")
p_load("cowplot")
p_load("igraph")

Description

Use limma to identify differential patterns of proteomic and transcriptomic changes in a time series heat shock experiment.

Learning Objectives

At the end of this exercise, you should be able to:

  • Recognize the steps of DE analysis of Mass Spec data
  • Identify differentially Abundant proteins with limma
  • Better understand the temporal relationship between gene expression and protein abundance

Loading in the count data file

We are downloading the expected raw counts from a Github repository using the code below. Just as in previous exercises, assign the data to the variable counts. You can change the file path if you have saved it to your computer in a different location.

# Load in raw counts for all samples
counts <- read.delim('https://github.com/clstacy/GenomicDataAnalysis_Fa23/raw/main/data/YPS606_TF_Rep1_4_ExpectedCounts.txt',
    sep = "\t",
    header = T,
    row.names = 1 # convert first column to rownames
  )

Preprocessing raw count data

# tidy raw counts to visualize distributions
tidy_counts <- counts %>%
  rownames_to_column("gene_id") %>%
  tidyr::pivot_longer(
    .,                        # The dot is the the input data, magrittr tutorial
    col = -gene_id
    ) %>%  
  separate_wider_delim(name, delim = "_", names = c("strain", "stress", "replicate"),cols_remove = FALSE)

(
p <- tidy_counts %>%
    ggplot(., aes(x = replicate, y = value)) +        # x = treatment, y = RNA Seq count
    geom_violin() +                                   # violin plot, show distribution
    geom_point(alpha = 0.2) +                         # add transparent points
    theme_bw() +
    theme(
      axis.text.x = element_text(angle = 90)          # Rotate treatment text
    ) +
    labs(x = "Treatment Groups", y = "RNA Seq Counts") +
    facet_grid(cols = vars(stress), rows = vars(strain), drop = TRUE, scales = "free_x") +     # Facet by condition
    # scale_y_log10() +
    ggtitle("Raw count distributions")
)

Data normalization

Let’s use edgeR to normalize the raw counts

# read into a DGElist
y <- edgeR::DGEList(counts,
                    group=as.factor(
                      # remove replicate from name
                      sub("_[^_]+$", "", colnames(counts))
                    ),
                    genes = rownames(counts)
                    )

# filter low counts
keep <- rowSums(cpm(y) > 1) >= 4
y <- y[keep,]

# normalize with cpm
normalized_counts <- cpm(y,
                         log = TRUE)
str(normalized_counts)
##  num [1:5756, 1:48] 4.59 5.07 10.95 0.18 8.03 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:5756] "YAL001C" "YAL002W" "YAL003W" "YAL004W" ...
##   ..$ : chr [1:48] "YPS606_Mock_Rep1" "YPS606_Mock_Rep2" "YPS606_Mock_Rep3" "YPS606_Mock_Rep4" ...

Now that we’ve normalized, let’s look at the distributions again

# create data 
tidy_normalized_counts <- normalized_counts %>%
  data.frame() %>%
  rownames_to_column("gene_id") %>%
  tidyr::pivot_longer(
    .,                        # The dot is the the input data, magrittr tutorial
    col = -gene_id
    ) %>%  
  separate_wider_delim(name, delim = "_", names = c("strain", "stress", "replicate"),cols_remove = FALSE)

(
p_normalized <- tidy_normalized_counts %>%
    ggplot(., aes(x = replicate, y = value)) +        # x = treatment, y = RNA Seq count
    geom_violin() +                                   # violin plot, show distribution
    geom_point(alpha = 0.2) +                         # add transparent points
    theme_bw() +
    theme(
      axis.text.x = element_text(angle = 90)          # Rotate treatment text
    ) +
    labs(x = "Treatment Groups", y = "normalized expression") +
    facet_grid(cols = vars(stress),rows = vars(strain), drop = TRUE, scales = "free_x") +     # Facet by condition
    # scale_y_log10() +
    ggtitle("Normalized CPM distribution")
)

Variability-based subsetting of gene list

For WGCNA, we don’t want to correlate all of the genes because (1) computational limitation and (2) genes that vary a small amount increase the number of variables in the model, making it harder to be confident in the patterns we do see. One way to account for this is by subsetting our entire gene list.

One common approach for subsetting genes is taking genes whose variance is greater than the \(n^{th}\) quantile of gene varainces. We’ll do that now:

# get the variance for each row (gene)
rv_wpn <- matrixStats::rowVars(normalized_counts)

# see a summary of the quantiles of variances
summary(rv_wpn)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##  0.01052  0.14640  0.30679  0.78837  0.74320 20.77257
# get variance corresponding to 75th percentile
q75_wpn <- quantile( matrixStats::rowVars(normalized_counts), .75)  # common cutoff

# # get variance corresponding to 95th percentile
# q95_wpn <- quantile( matrixStats::rowVars(normalized_counts), .95)  # <= changed to 95 quantile 

# to reduce dataset, only get genes with variance greater than above
normalized_counts_variable_genes <- normalized_counts[ rv_wpn > q75_wpn, ]

str(normalized_counts_variable_genes)
##  num [1:1439, 1:48] 0.18 8.03 2.32 -2.21 5.16 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:1439] "YAL004W" "YAL005C" "YAL008W" "YAL016C-A" ...
##   ..$ : chr [1:48] "YPS606_Mock_Rep1" "YPS606_Mock_Rep2" "YPS606_Mock_Rep3" "YPS606_Mock_Rep4" ...

Notice the number of genes retained is much fewer now.

What are the distributions of counts of the normalized cpm’s of the retained genes?

tidy_normalized_counts_variable_genes <- normalized_counts_variable_genes %>%
  data.frame() %>%
  rownames_to_column("gene_id") %>%
  pivot_longer(-gene_id) %>%  
  separate_wider_delim(name, delim = "_", names = c("strain", "stress", "replicate"),cols_remove = FALSE)

(
p_normalized_varGenes <- tidy_normalized_counts_variable_genes %>%
    ggplot(., aes(x = replicate, y = value)) +        # x = treatment, y = RNA Seq count
    geom_violin() +                                   # violin plot, show distribution
    geom_point(alpha = 0.2) +                         # add transparent points
    theme_bw() +
    theme(
      axis.text.x = element_text(angle = 90)          # Rotate treatment text
    ) +
    labs(x = "Treatment Groups", y = "normalized expression") +
    facet_grid(cols = vars(stress),rows = vars(strain), drop = TRUE, scales = "free_x") +     # Facet by condition
    # scale_y_log10() +
    ggtitle("Normalized CPM distribution")
)

For the analyses we’ve done so far, we’ve needed the columns to correspond to samples and the rows to correspond to genes. WGCNA expects the opposite, where columns are genes and rows are samples. We can use the transpose function in r t() in order to convert our data. Let’s do that now:

# transpose our normalized count matrix and save to new object
variable_genes_matrix = t(normalized_counts_variable_genes)

# Look at first 5 rows and 10 columns
variable_genes_matrix[1:5,1:10]           
##                     YAL004W   YAL005C  YAL008W  YAL016C-A  YAL017W  YAL025C
## YPS606_Mock_Rep1  0.1800382  8.028244 2.324995 -2.2084931 5.159889 6.894753
## YPS606_Mock_Rep2 -1.4421388  7.958597 2.393871 -1.4421388 5.210439 6.863944
## YPS606_Mock_Rep3  2.7578618  8.441883 2.463258 -2.1949107 5.172265 6.610721
## YPS606_Mock_Rep4  0.6912945  8.266050 2.514840 -2.2718848 5.102592 6.520798
## YPS606_EtOH_Rep1  3.3881231 13.421082 4.596529 -0.1718948 7.498577 2.560692
##                     YAL028W  YAL036C     YAL037W  YAL053W
## YPS606_Mock_Rep1 0.57088502 7.529241  0.08734029 7.332505
## YPS606_Mock_Rep2 0.56447336 7.658894  0.49035704 7.160599
## YPS606_Mock_Rep3 0.02355968 7.491845 -0.46143877 7.299935
## YPS606_Mock_Rep4 1.23215081 7.599281 -0.52142280 7.104422
## YPS606_EtOH_Rep1 4.24326006 4.160897  1.77493580 7.000541

WGCNA thresholding

We can see now that the rows = treatments and columns = gene probes. We’re ready to start WGCNA. A correlation network would be a complete network (all genes would be connected to all other genes). Hence we need to pick a threshhold value (if correlation is below threshold, remove the edge). We assume the true biological network follows a scale-free structure (see papers by Albert Barabasi).

To do that, WGCNA will try a range of soft thresholds and create a diagnostic plot. This step can take several minutes.

# library(WGCNA)
allowWGCNAThreads(nThreads = 4)          # allow multi-threading (optional)
## Allowing multi-threading with up to 4 threads.
#> Allowing multi-threading with up to 4 threads.

# Choose a set of soft-thresholding powers
powers = c(c(1:10), seq(from = 12, to = 20, by = 2))

# Call the network topology analysis function
sft = pickSoftThreshold(
  variable_genes_matrix, # <= Input data
  #blockSize = 30,
  powerVector = powers,
  verbose = 5
  )
## pickSoftThreshold: will use block size 1439.
##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 1439 of 1439
##    Power SFT.R.sq   slope truncated.R.sq mean.k. median.k. max.k.
## 1      1   0.9360  1.8100          0.945   798.0     875.0   1020
## 2      2   0.7490  0.6570          0.816   559.0     623.0    831
## 3      3   0.4600  0.2840          0.639   428.0     472.0    710
## 4      4   0.0688  0.0803          0.524   344.0     368.0    623
## 5      5   0.0400 -0.0627          0.675   286.0     292.0    558
## 6      6   0.2630 -0.1580          0.831   243.0     238.0    507
## 7      7   0.4550 -0.2320          0.895   210.0     196.0    465
## 8      8   0.6190 -0.2910          0.914   183.0     164.0    430
## 9      9   0.7500 -0.3350          0.947   162.0     137.0    400
## 10    10   0.7980 -0.3840          0.958   145.0     117.0    374
## 11    12   0.8780 -0.4610          0.950   117.0      87.8    331
## 12    14   0.9080 -0.5160          0.949    97.5      67.8    296
## 13    16   0.9270 -0.5630          0.941    82.5      52.7    268
## 14    18   0.9580 -0.6080          0.964    70.7      42.5    245
## 15    20   0.9460 -0.6450          0.942    61.4      34.6    225

Let’s visualize those results

par(mfrow = c(1,2));
cex1 = 0.9;

plot(sft$fitIndices[, 1],
     -sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2],
     xlab = "Soft Threshold (power)",
     ylab = "Scale Free Topology Model Fit, signed R^2",
     main = paste("Scale independence")
)
text(sft$fitIndices[, 1],
     -sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2],
     labels = powers, cex = cex1, col = "red"
)
abline(h = 0.90, col = "red")
plot(sft$fitIndices[, 1],
     sft$fitIndices[, 5],
     xlab = "Soft Threshold (power)",
     ylab = "Mean Connectivity",
     type = "n",
     main = paste("Mean connectivity")
)
text(sft$fitIndices[, 1],
     sft$fitIndices[, 5],
     labels = powers,
     cex = cex1, col = "red")

Pick a soft threshold power near the curve of the plot. We’ll pick 12, but feel free to experiment with other powers to see how it affects your results. Now we can create the network using the blockwiseModules command. The blockwiseModule may take a while to run, since it is constructing the TOM (topological overlap matrix) and several other steps. While it runs, take a look at the blockwiseModule documentation (link to [vignette][https://www.rdocumentation.org/packages/WGCNA/versions/1.69/topics/blockwiseModules]) for more information on the parameters. How might you change the parameters to get more or less modules?

Generate network

# NOTE: Be sure to run this entire chunk at once
picked_power = 12
cor <- WGCNA::cor         # Force it to use WGCNA cor function (fix a namespace conflict issue)
variable_genes_network <- blockwiseModules(variable_genes_matrix,                # <= input here

                          # == Adjacency Function ==
                          power = picked_power,                # <= power here
                          networkType = "signed",

                          # == Tree and Block Options ==
                          deepSplit = 2,
                          pamRespectsDendro = F,
                          # detectCutHeight = 0.75,
                          minModuleSize = 30,
                          maxBlockSize = 4000,

                          # == Module Adjustments ==
                          reassignThreshold = 0,
                          mergeCutHeight = 0.25,

                          # == TOM == Archive the run results in TOM file (saves time)
                          saveTOMs = T,
                          saveTOMFileBase = "ER",

                          # == Output Options
                          numericLabels = F,
                          verbose = 3)
##  Calculating module eigengenes block-wise from all genes
##    Flagging genes and samples with too many missing values...
##     ..step 1
##  ..Working on block 1 .
##     TOM calculation: adjacency..
##     ..will use 4 parallel threads.
##      Fraction of slow calculations: 0.000000
##     ..connectivity..
##     ..matrix multiplication (system BLAS)..
##     ..normalization..
##     ..done.
##    ..saving TOM for block 1 into file ER-block.1.RData
##  ....clustering..
##  ....detecting modules..
##  ....calculating module eigengenes..
##  ....checking kME in modules..
##      ..removing 20 genes from module 1 because their KME is too low.
##      ..removing 10 genes from module 2 because their KME is too low.
##      ..removing 8 genes from module 3 because their KME is too low.
##      ..removing 2 genes from module 4 because their KME is too low.
##      ..removing 2 genes from module 5 because their KME is too low.
##  ..merging modules that are too close..
##      mergeCloseModules: Merging modules whose distance is less than 0.25
##        Calculating new MEs...
# return cor() to be the usual command
cor <- stats::cor

Let’s visualize a dendrogram of those modules, each terminal branch corresponds to a single gene.

# Convert labels to colors for plotting
mergedColors_var_genes = labels2colors(variable_genes_network$colors)
# Plot the dendrogram and the module colors underneath
plotDendroAndColors(
  variable_genes_network$dendrograms[[1]],
  mergedColors_var_genes[variable_genes_network$blockGenes[[1]]],
  "Module colors",
  dendroLabels = FALSE,
  hang = 0.03,
  addGuide = TRUE,
  guideHang = 0.05 )

Now let’s extract the genes and their coresponding module/color to a data frame

modules_variable_genes_df <- data.frame(
  gene_id = names(variable_genes_network$colors),
  colors = labels2colors(variable_genes_network$colors)
)

head(modules_variable_genes_df)
##     gene_id    colors
## 1   YAL004W       red
## 2   YAL005C       red
## 3   YAL008W       red
## 4 YAL016C-A     black
## 5   YAL017W       red
## 6   YAL025C turquoise
# # pick out a few modules of interest here
# modules_of_interest = c("turquoise", "blue", "brown", "grey")
# 
# # Pull out list of genes in that module
# subset_modules_variable_genes_df = modules_variable_genes_df %>%
#   subset(colors %in% modules_of_interest)
# 
# row.names(modules_variable_genes_df) = modules_variable_genes_df$gene_id
# 
# # Get normalized expression for those genes
# subexpr = normalized_counts_variable_genes[subset_modules_variable_genes_df$gene_id,]
# 
# # convert to a data frame
# subset_modules_variable_genes_df = data.frame(subexpr) %>%
#   mutate(
#     gene_id = row.names(.)
#   ) %>%
#   pivot_longer(-gene_id) %>%
#   mutate(
#     module = modules_variable_genes_df[gene_id,]$colors
#   )
# 
# subset_modules_variable_genes_df %>% 
#   ggplot(., aes(x=name, y=1+value, group=gene_id)) +
#   geom_line(aes(color = module),
#             alpha = 0.2) +
#   theme_bw() +
#   theme(
#     axis.text.x = element_text(angle = 90)
#   ) +
#   facet_grid(rows = vars(module), 
#              # scales = "free_y"
#              ) +
#   labs(x = "treatment",
#        y = "normalized expression") +
#   scale_color_identity() #+ 
#   # scale_y_log10()

Visualize Modules

Plot gene expression correlations in a grid, based on genotype and module membership

(
  var_genes_plot <- normalized_counts_variable_genes %>%
  data.frame() %>%
    mutate(
        gene_id = row.names(.)
    ) %>%
    pivot_longer(-gene_id) %>%
    mutate(
        module = variable_genes_network$colors[gene_id]
    ) %>% 
  separate_wider_delim(name, delim = "_", names = c("strain", "stress", "replicate"),cols_remove = FALSE) %>%
  ggplot(., aes(x=name, y=value, group=gene_id)) +
    geom_line(aes(color = "grey"), size=1,
            alpha = 0.2) +
    geom_line(aes(color = module),
            alpha = 0.2) +
    # geom_boxplot(width=0.5,
    #              aes(group=name, color = module),
    #              outlier.shape = NA,
    #              alpha=0.9) +
    theme_classic() +
    theme(axis.text.x = element_text(angle = 90)) +
    facet_grid(rows = vars(module), 
             cols= vars(strain),
             scales = "free"
             ) +
    scale_x_discrete(position = "top") +
    labs(x = "treatment",
       y = "normalized expression") +
    # use color variable as the color
    scale_color_identity()
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Heatmap correlating modules

Now, let’s generate a heatmap of the relationship between the modules

# Get Module Eigengenes per cluster
MEs0 <- moduleEigengenes(variable_genes_matrix, mergedColors_var_genes)$eigengenes

# Reorder modules so similar modules are next to each other
MEs0 <- orderMEs(MEs0)
module_order = names(MEs0) %>% gsub("ME","", .)

# Add treatment names
MEs0$treatment = row.names(MEs0)

# tidy & plot data
mME = MEs0 %>%
  pivot_longer(-treatment) %>%
  mutate(
    name = gsub("ME", "", name),
    name = factor(name, levels = module_order)
  )

mME %>% ggplot(., aes(x=treatment, y=name, fill=value)) +
  geom_tile() +
  theme_bw() +
  scale_fill_gradient2(
    low = "blue",
    high = "red",
    mid = "white",
    midpoint = 0,
    limit = c(-1,1)) +
  theme(axis.text.x = element_text(angle=90)) +
  labs(title = "Module-trait Relationships", y = "Modules", fill="corr")

Generate & Visualize Networks

We can generate a network graph file for analysis in Cytoscape or as an edge/vertices file.

i think this code chunk can be removed

# # we could subset to only certain modules, if we want to.
# genes_of_interest = modules_variable_genes_df #%>%
#   # subset(colors %in% modules_of_interest)
# 
# # if we subset, this would only get genes in selected modules
# expr_of_interest = normalized_counts_variable_genes[genes_of_interest$gene_id,]
# 
# # Only recalculate TOM for modules of interest (faster, altho there's some online discussion if this will be slightly off)
# TOM = TOMsimilarityFromExpr(t(expr_of_interest),
#                             power = picked_power)
# 
# # Add gene names to row and columns
# row.names(TOM) = row.names(expr_of_interest)
# colnames(TOM) = row.names(expr_of_interest)
# 
# edge_list_var_genes = data.frame(TOM) %>%
#   mutate(
#     gene1 = row.names(.)
#   ) %>%
#   pivot_longer(-gene1) %>%
#   dplyr::rename(gene2 = name, correlation = value) %>%
#   unique() %>%
#   subset(!(gene1==gene2))
# 
# head(edge_list_var_genes)
# correlation cutoff
corr_cutoff = 0.3

# first, create adjacency matrix
adjacency = adjacency(variable_genes_matrix, power=picked_power, type="signed")

# Calculate topological overlap matrix and dissimilarity from adjacency matrix
TOM = TOMsimilarity(adjacency, TOMType="signed")
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
# Add gene names to row and columns
rownames(TOM) = AnnotationDbi::select(org.Sc.sgd.db,keys=row.names(adjacency),columns="GENENAME") %>% mutate(label = case_when(
  is.na(GENENAME) ~ ORF,
  TRUE ~ GENENAME)) %>% pull(label)
## 'select()' returned 1:1 mapping between keys and columns
colnames(TOM) = AnnotationDbi::select(org.Sc.sgd.db,keys=row.names(adjacency),columns="GENENAME") %>% mutate(label = case_when(
  is.na(GENENAME) ~ ORF,
  TRUE ~ GENENAME)) %>% pull(label)
## 'select()' returned 1:1 mapping between keys and columns
# assign TOM to new variable
adj <- TOM

# only show correlations greater than corr_cutoff
adj[adj > corr_cutoff] = 1

# set all correlations below that threshold to 0
adj[adj != 1] = 0

# graph adjacency network
network_graph_data <- graph.adjacency(adj)

# remove redundant self-loops in the network
network_graph_data <- simplify(network_graph_data)

# add colors for the network graph from constructed network modules
V(network_graph_data)$color <- variable_genes_network$colors

# set plotting margins
par(mar=c(0,0,0.5,0))
# remove unconnected nodes
network_graph_data <- delete.vertices(network_graph_data,
                                      degree(network_graph_data)==0)


plot(network_graph_data, 
     layout=layout.fruchterman.reingold(network_graph_data, niter=1000),
     vertex.size=3,edge.arrow.size=0.1, 
     vertex.label.cex = 0.01,  # increase this value to see gene names
     vertex.label.color= "black", rescale=TRUE, ylim=c(-1,1),xlim=c(-1,1),
     main = "Network plot for most variable genes")

Let’s add GO enrichments to the module pattern graph

GO enrichments

# create list of genes in each one
variable_geneIDs_module_list <- normalized_counts_variable_genes %>%
  data.frame() %>%
    mutate(
        gene_id = row.names(.)
    ) %>%
    pivot_longer(-gene_id) %>%
    mutate(
        module = variable_genes_network$colors[gene_id]
    ) %>%
  group_by(module) %>%
  # only get genes that are DE
  # filter(DE_direction != 0) %>%
  group_split() %>% # split into many lists
  purrr::set_names(purrr::map_chr(., ~.x$module[1])) %>% # assign names
  # get just the ORF names
  map(., ~ .x |> pull(all_of("gene_id"))) %>%
  # remove duplicates
  map(., ~ unique(.))
  

# run enrichment for each module
GO_enrich_variable_gene_modules <- clusterProfiler::compareCluster(
                                  geneCluster   = variable_geneIDs_module_list, 
                                  ont           = "BP",
                                  OrgDb         = org.Sc.sgd.db,
                                  pAdjustMethod = "BH",
                                  pvalueCutoff  = 0.5, # high to get for all modules
                                  qvalueCutoff  = 0.5,
                                  readable      = FALSE,
                                  fun           = 'enrichGO',
                                  keyType       = 'ORF'
                                  )  |>
  # let's add a 'richFactor' column that gives us the proportion of genes DE in the term
  mutate(richFactor = Count / as.numeric(sub("/\\d+", "", BgRatio)))


# plot those enrichments
GO_enrich_variable_gene_modules %>%
  as.data.frame() %>% 
  group_by(Cluster) %>%
  slice_min(order_by=pvalue, n = 5) %>%
  ggplot(.,
           aes(Cluster, fct_reorder(Description, richFactor))) +
      geom_point(aes(color = p.adjust, size = Count)) +
      scale_color_gradientn(
        colours = c("#f7ca64", "#46bac2", "#7e62a3"),
        trans = "log10",
        guide = guide_colorbar(reverse = TRUE, order = 1)
      ) +
      scale_size_continuous(range = c(2, 12)) +
      scale_x_discrete(position = "top") +
      scale_y_discrete(labels = function(x) str_wrap(x, width = 25)) +
      xlab("Module") +
      ylab(NULL) +
      ggtitle("Enriched GO Categories by Module") +
      # facet_wrap(~DE, scales = "free_y") +
      theme_bw() +
      theme(text = element_text(size=14),
            axis.text.x = element_text(angle = 90,hjust=0) )

# here are the data to add to cowplot:
GO_summaries_variable_genes <- GO_enrich_variable_gene_modules %>%
  as.data.frame() %>% 
  group_by(Cluster) %>%
  slice_min(order_by=p.adjust, n = 5) %>%
  slice_head(n = 5) %>%
  mutate(Description = case_when(
    p.adjust < 0.05 ~ paste0(Description," (",Count,")"),
    TRUE ~ " "
  )) %>%
  summarize(description = str_c(Description, collapse = "\n")) %>%
  pull(description)


# draw plot with GO terms
cowplot::ggdraw(var_genes_plot +
                  theme(plot.margin = margin(t = 0.5, r = 5, b = 0.5, l = 0.1, unit = "cm"))) +
  cowplot::draw_text(GO_summaries_variable_genes, 
                     x = 0.76,  # Adjust this value for the desired horizontal position
                     y = rev(seq(0.1, 0.82, length.out = length(GO_summaries_variable_genes))),  # Adjust this sequence for the desired vertical positions
                     size = 6, 
                     hjust = 0,  # Align to the left
                     vjust = 0.5)

Create a network using genes identified as differentially expressed

Our network so far didn’t use any prior information about genes to include or exclude, just those that are most variable. We do have information about the genes that are considered differentially expressed in their corresponding genotype and environment. What does the network look like if we build it based on those genes that have been identified as differentially expressed?

# set FDR threshold for being considered a DE gene, 
# this is applied to the data we are loading in here.
FDR_cutoff <- 0.01

# Load in DE gene summary
DE_genes <- readr::read_delim("https://github.com/clstacy/GenomicDataAnalysis_Fa23/raw/main/data/DE_yeast_TF_stress.txt.gz",
    delim = "\t", 
    escape_double = FALSE, 
    name_repair = "universal",
    show_col_types=FALSE,
    trim_ws = TRUE) %>%
  dplyr::select(ID, contains("FDR")) %>%
  dplyr::select(ID, contains(".v.")) %>%
  pivot_longer(-ID, names_to = "contrast", values_to = "FDR") %>%
  # only get genes with FDR < 0.01
  filter(FDR < FDR_cutoff) %>%
  pull(ID) %>%
  unique()
## New names:
## • `logFC: WT NaCl response` -> `logFC..WT.NaCl.response`
## • `FDR: WT NaCl response` -> `FDR..WT.NaCl.response`
## • `logFC: msn24 mutant NaCl response` -> `logFC..msn24.mutant.NaCl.response`
## • `FDR: msn24 mutant NaCl response` -> `FDR..msn24.mutant.NaCl.response`
## • `logFC: yap1 mutant NaCl response` -> `logFC..yap1.mutant.NaCl.response`
## • `FDR: yap1 mutant NaCl response` -> `FDR..yap1.mutant.NaCl.response`
## • `logFC: skn7 mutant NaCl response` -> `logFC..skn7.mutant.NaCl.response`
## • `FDR: skn7 mutant NaCl response` -> `FDR..skn7.mutant.NaCl.response`
## • `logFC: WT EtOH response` -> `logFC..WT.EtOH.response`
## • `FDR: WT EtOH response` -> `FDR..WT.EtOH.response`
## • `logFC: msn24 mutant EtOH response` -> `logFC...msn24.mutant.EtOH.response`
## • `FDR: msn24 mutan EtOH response` -> `FDR...msn24.mutan..EtOH.response`
## • `logFC: yap1 mutant EtOH response` -> `logFC...yap1.mutant.EtOH.response`
## • `FDR: yap1 mutant EtOH response` -> `FDR...yap1.mutant.EtOH.response`
## • `logFC: skn7 mutant EtOH response` -> `logFC...skn7.mutant.EtOH.response`
## • `FDR: skn7 mutant EtOH response` -> `FDR...skn7.mutant.EtOH.response`
## • `logFC: WT v msn24 mutant: NaCl response` ->
##   `logFC..WT.v.msn24.mutant..NaCl.response`
## • `FDR: WT v msn24 mutant: NaCl response` ->
##   `FDR..WT.v.msn24.mutant..NaCl.response`
## • `logFC: WT v yap1 mutant: NaCl response` ->
##   `logFC..WT.v.yap1.mutant..NaCl.response`
## • `FDR: WT v yap1 mutant: NaCl response` ->
##   `FDR..WT.v.yap1.mutant..NaCl.response`
## • `logFC: WT v skn7 mutant: NaCl response` ->
##   `logFC..WT.v.skn7.mutant..NaCl.response`
## • `FDR: WT v skn7 mutant NaCl response` ->
##   `FDR..WT.v.skn7.mutant.NaCl.response`
## • `logFC: WT v msn24 mutant: EtOH response` ->
##   `logFC..WT.v.msn24.mutant..EtOH.response`
## • `FDR: WT v msn24 mutant: EtOH response` ->
##   `FDR..WT.v.msn24.mutant..EtOH.response`
## • `logFC: WT v yap1 mutant: EtOH response` ->
##   `logFC..WT.v.yap1.mutant..EtOH.response`
## • `FDR: WT v yap1 mutant: EtOH response` ->
##   `FDR..WT.v.yap1.mutant..EtOH.response`
## • `logFC: WT v skn7 mutant: EtOH response` ->
##   `logFC..WT.v.skn7.mutant..EtOH.response`
## • `FDR: WT v skn7 mutant: EtOH response` ->
##   `FDR..WT.v.skn7.mutant..EtOH.response`
# subset the counts to only be counts for genes that are DE
counts_DE <- counts %>%
  rownames_to_column("ORF") %>%
  dplyr::filter(ORF %in% DE_genes) %>%
  column_to_rownames("ORF")

# normalize with cpm, filter to just DE genes
normalized_counts_DE_genes <- cpm(y,
                         log = TRUE) %>%
  data.frame() %>%
  rownames_to_column("ORF") %>%
  dplyr::filter(ORF %in% DE_genes) %>%
  column_to_rownames("ORF") %>%
  as.matrix()


DE_genes_matrix = t(normalized_counts_DE_genes)

What is the correct power for this analysis?

Find ideal power

# Choose a set of soft-thresholding powers
powers = c(c(1:10), seq(from = 12, to = 20, by = 2))

# Call the network topology analysis function
sft = pickSoftThreshold(
  DE_genes_matrix,             # <= Input data
  #blockSize = 30,
  powerVector = powers,
  verbose = 5
  )
## pickSoftThreshold: will use block size 1972.
##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 1972 of 1972
##    Power SFT.R.sq   slope truncated.R.sq mean.k. median.k. max.k.
## 1      1   0.9580  2.2300          0.949  1010.0    1090.0   1320
## 2      2   0.9110  0.7890          0.886   671.0     724.0   1040
## 3      3   0.7050  0.2980          0.636   493.0     521.0    868
## 4      4   0.0154  0.0234         -0.155   383.0     390.0    746
## 5      5   0.3260 -0.1420          0.182   308.0     302.0    654
## 6      6   0.6560 -0.2610          0.618   255.0     240.0    581
## 7      7   0.7660 -0.3380          0.729   214.0     193.0    521
## 8      8   0.8500 -0.4020          0.833   183.0     157.0    471
## 9      9   0.9010 -0.4510          0.897   159.0     129.0    429
## 10    10   0.9290 -0.4860          0.928   139.0     108.0    393
## 11    12   0.9520 -0.5480          0.960   109.0      76.6    333
## 12    14   0.9670 -0.6100          0.967    88.1      56.1    290
## 13    16   0.9690 -0.6660          0.964    72.6      42.3    258
## 14    18   0.9670 -0.7070          0.958    60.8      32.5    231
## 15    20   0.9700 -0.7490          0.962    51.6      25.7    210
par(mfrow = c(1,2));
cex1 = 0.9;

plot(sft$fitIndices[, 1],
     -sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2],
     xlab = "Soft Threshold (power)",
     ylab = "Scale Free Topology Model Fit, signed R^2",
     main = paste("Scale independence")
)
text(sft$fitIndices[, 1],
     -sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2],
     labels = powers, cex = cex1, col = "red"
)
abline(h = 0.90, col = "red")
plot(sft$fitIndices[, 1],
     sft$fitIndices[, 5],
     xlab = "Soft Threshold (power)",
     ylab = "Mean Connectivity",
     type = "n",
     main = paste("Mean connectivity")
)
text(sft$fitIndices[, 1],
     sft$fitIndices[, 5],
     labels = powers,
     cex = cex1, col = "red")

It looks like a higher power would better fit this data. Let’s try with 12. You can try changing to a different threshold and see how the results change.

picked_power = 12
cor <- WGCNA::cor         # Force it to use WGCNA cor function (fix a namespace conflict issue)
DE_genes_network <- blockwiseModules(DE_genes_matrix,                # <= input here

                          # == Adjacency Function ==
                          power = picked_power,                # <= power here
                          networkType = "signed",

                          # == Tree and Block Options ==
                          deepSplit = 2,
                          pamRespectsDendro = F,
                          # detectCutHeight = 0.75,
                          minModuleSize = 30,
                          maxBlockSize = 4000,

                          # == Module Adjustments ==
                          reassignThreshold = 0,
                          mergeCutHeight = 0.25,

                          # == TOM == Archive the run results in TOM file (saves time)
                          saveTOMs = T,
                          saveTOMFileBase = "ER",

                          # == Output Options
                          numericLabels = F,
                          verbose = 3)
##  Calculating module eigengenes block-wise from all genes
##    Flagging genes and samples with too many missing values...
##     ..step 1
##  ..Working on block 1 .
##     TOM calculation: adjacency..
##     ..will use 4 parallel threads.
##      Fraction of slow calculations: 0.000000
##     ..connectivity..
##     ..matrix multiplication (system BLAS)..
##     ..normalization..
##     ..done.
##    ..saving TOM for block 1 into file ER-block.1.RData
##  ....clustering..
##  ....detecting modules..
##  ....calculating module eigengenes..
##  ....checking kME in modules..
##      ..removing 10 genes from module 1 because their KME is too low.
##      ..removing 5 genes from module 2 because their KME is too low.
##      ..removing 4 genes from module 3 because their KME is too low.
##      ..removing 3 genes from module 4 because their KME is too low.
##      ..removing 3 genes from module 5 because their KME is too low.
##  ..merging modules that are too close..
##      mergeCloseModules: Merging modules whose distance is less than 0.25
##        Calculating new MEs...
# fix namespace issue with cor to return to desired function
cor <- stats::cor    
# Convert labels to colors for plotting
mergedColors_DE = labels2colors(DE_genes_network$colors)
# Plot the dendrogram and the module colors underneath
plotDendroAndColors(
  DE_genes_network$dendrograms[[1]],
  mergedColors_DE[DE_genes_network$blockGenes[[1]]],
  "Module colors",
  dendroLabels = FALSE,
  hang = 0.03,
  addGuide = TRUE,
  guideHang = 0.05 )

Now let’s extract the genes and their corresponding module/color to a data frame

modules_DE_genes_df <- data.frame(
  gene_id = names(variable_genes_network$colors),
  colors = labels2colors(variable_genes_network$colors)
)

head(modules_DE_genes_df)
##     gene_id    colors
## 1   YAL004W       red
## 2   YAL005C       red
## 3   YAL008W       red
## 4 YAL016C-A     black
## 5   YAL017W       red
## 6   YAL025C turquoise

Visualize cluster patterns

(
  DE_genes_plot <- normalized_counts_DE_genes %>%
  data.frame() %>%
    mutate(
        gene_id = row.names(.)
    ) %>%
    pivot_longer(-gene_id) %>%
    mutate(
        module = DE_genes_network$colors[gene_id]
    ) %>% 
  separate_wider_delim(name, delim = "_", names = c("strain", "stress", "replicate"),cols_remove = FALSE) %>%
  ggplot(., aes(x=name, y=value+1, group=gene_id)) +
    geom_line(aes(color = "grey"), size=1,
            alpha = 0.2) +
    geom_line(aes(color = module),
            alpha = 0.2) +
    # geom_boxplot(width=0.5,
    #              aes(group=name, color = module),
    #              outlier.shape = NA,
    #              alpha=0.9) +
    theme_classic() +
    theme(axis.text.x = element_text(angle = 90)) +
    facet_grid(rows = vars(module), 
             cols= vars(strain),
             scales = "free"
             ) +
    scale_x_discrete(position = "top") +
    labs(x = "treatment",
       y = "normalized expression") +
    # use color variable as the color
    scale_color_identity()
)

Generate a heatmap of the eigengenes

# Get Module Eigengenes per cluster
DE_eigengenes <- moduleEigengenes(DE_genes_matrix, mergedColors_DE)$eigengenes

# Reorder modules so similar modules are next to each other
DE_eigengenes <- orderMEs(DE_eigengenes)

# get order of modules without extra naming
module_order = names(DE_eigengenes) %>% gsub("ME","", .)

# Add treatment names
DE_eigengenes$treatment = row.names(DE_eigengenes)

# tidy & plot data
tidy_DE_eigengenes <- DE_eigengenes %>%
  pivot_longer(-treatment) %>%
  mutate(
    name = gsub("ME", "", name),
    name = factor(name, levels = module_order)
  )

tidy_DE_eigengenes %>% 
  ggplot(., aes(x=treatment, y=name, fill=value)) +
  geom_tile() +
  theme_bw() +
  scale_fill_gradient2(
    low = "blue",
    high = "red",
    mid = "white",
    midpoint = 0,
    limit = c(-1,1)) +
  theme(axis.text.x = element_text(angle=90)) +
  labs(title = "Module-trait Relationships", y = "Modules", fill="corr")

Generate and Export Networks

The network file can be generated for Cytoscape or as an edge/vertices file.

genes_of_interest_DE = modules_DE_genes_df #%>%
  #subset(colors %in% modules_of_interest)

expr_of_interest_DE = normalized_counts_variable_genes#[genes_of_interest$gene_id,]


# Only recalculate TOM for modules of interest (faster, altho there's some online discussion if this will be slightly off)
TOM_DE = TOMsimilarityFromExpr(t(expr_of_interest_DE),
                            power = picked_power)
## TOM calculation: adjacency..
## ..will use 4 parallel threads.
##  Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
# Add gene names to row and columns
row.names(TOM_DE) = row.names(expr_of_interest_DE)
colnames(TOM_DE) = row.names(expr_of_interest_DE)

edge_list_DE = data.frame(TOM_DE) %>%
  mutate(
    gene1 = row.names(.)
  ) %>%
  pivot_longer(-gene1) %>%
  dplyr::rename(gene2 = name, correlation = value) %>%
  unique() %>%
  subset(!(gene1==gene2))

head(edge_list_DE)
## # A tibble: 6 Ă— 3
##   gene1   gene2     correlation
##   <chr>   <chr>           <dbl>
## 1 YAL004W YAL005C    0.182     
## 2 YAL004W YAL008W    0.0689    
## 3 YAL004W YAL016C.A  0.00000693
## 4 YAL004W YAL017W    0.162     
## 5 YAL004W YAL025C    0.153     
## 6 YAL004W YAL028W    0.0835

What does the distribution of correlations look like?

edge_list_DE %>%
  ggplot(aes(x=correlation)) +
  geom_histogram(bins=60) +
  theme_bw()

Let’s create the network based on these DE genes

# correlation cutoff
corr_cutoff = 0.3

# first, create adjacency matrix
adjacency_DE = adjacency(DE_genes_matrix, power=picked_power, type="signed")

# Calculate topological overlap matrix and dissimilarity from adjacency matrix
TOM_DE = TOMsimilarity(adjacency_DE, TOMType="signed")
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
# Add gene names to row and columns
rownames(TOM_DE) = AnnotationDbi::select(org.Sc.sgd.db,keys=row.names(adjacency_DE),columns="GENENAME") %>% mutate(label = case_when(
  is.na(GENENAME) ~ ORF,
  TRUE ~ GENENAME)) %>% pull(label)
## 'select()' returned 1:1 mapping between keys and columns
colnames(TOM_DE) = AnnotationDbi::select(org.Sc.sgd.db,keys=row.names(adjacency_DE),columns="GENENAME") %>% mutate(label = case_when(
  is.na(GENENAME) ~ ORF,
  TRUE ~ GENENAME)) %>% pull(label)
## 'select()' returned 1:1 mapping between keys and columns
# assign TOM to new variable
adj_DE <- TOM_DE

# only show correlations greater than corr_cutoff
adj_DE[adj_DE > corr_cutoff] = 1

# set all correlations below that threshold to 0
adj_DE[adj_DE != 1] = 0

# graph adjacency network
network_graph_data_DE <- graph.adjacency(adj_DE)

# remove redundant self-loops in the network
network_graph_data_DE <- simplify(network_graph_data_DE)

# add colors for the network graph from constructed network modules
V(network_graph_data_DE)$color <- DE_genes_network$colors


# remove unconnected nodes
network_graph_data_DE <- delete.vertices(network_graph_data_DE,
                                      degree(network_graph_data_DE)==0)

# set plotting margins
par(mar=c(0.5,0.5,0.5,0.5))

# generate plot
plot(network_graph_data_DE, 
     layout=layout.fruchterman.reingold(network_graph_data_DE, niter=1000),
     vertex.size=3, edge.arrow.size=0.1, 
     vertex.label.cex = 0.01,  # increase this value to see gene names
     vertex.label.color= "black", rescale=TRUE, ylim=c(-1,1),xlim=c(-1,1),
     main = "Network plot for differentially expressed genes")

GO enrichments

Let’s do GO enrichments on these terms as well.

# create list of genes in each one
DE_geneIDs_module_list <- normalized_counts_DE_genes %>%
  data.frame() %>%
    mutate(
        gene_id = row.names(.)
    ) %>%
    pivot_longer(-gene_id) %>%
    mutate(
        module = DE_genes_network$colors[gene_id]
    ) %>%
  group_by(module) %>%
  # only get genes that are DE
  # filter(DE_direction != 0) %>%
  group_split() %>% # split into many lists
  purrr::set_names(purrr::map_chr(., ~.x$module[1])) %>% # assign names
  # get just the ORF names
  map(., ~ .x |> pull(all_of("gene_id"))) %>%
  # remove duplicates
  map(., ~ unique(.))
  


GO_enrich_DE_gene_modules <- clusterProfiler::compareCluster(
                                  geneCluster   = DE_geneIDs_module_list, 
                                  ont           = "BP",
                                  OrgDb         = org.Sc.sgd.db,
                                  pAdjustMethod = "BH",
                                  pvalueCutoff  = 0.5, 
                                  qvalueCutoff  = 0.5,
                                  readable      = FALSE,
                                  fun           = 'enrichGO',
                                  keyType       = 'ORF'
                                  )  |>
  # let's add a 'richFactor' column that gives us the proportion of genes DE in the term
  mutate(richFactor = Count / as.numeric(sub("/\\d+", "", BgRatio)))


# get rid of sticks to save space
GO_enrich_DE_gene_modules %>%
  as.data.frame() %>% 
  group_by(Cluster) %>%
  slice_min(order_by=pvalue, n = 5) %>%
  ggplot(.,
           aes(Cluster, fct_reorder(Description, richFactor))) +
      geom_point(aes(color = p.adjust, size = Count)) +
      scale_color_gradientn(
        colours = c("#f7ca64", "#46bac2", "#7e62a3"),
        trans = "log10",
        guide = guide_colorbar(reverse = TRUE, order = 1)
      ) +
      scale_size_continuous(range = c(2, 12)) +
      scale_x_discrete(position = "top") +
      scale_y_discrete(labels = function(x) str_wrap(x, width = 25)) +
      xlab("Module") +
      ylab(NULL) +
      ggtitle("Enriched GO Categories by Module") +
      # facet_wrap(~DE, scales = "free_y") +
      theme_bw() +
      theme(text = element_text(size=14),
            axis.text.x = element_text(angle = 90,hjust=0) )

# here are the data to add to cowplot:
GO_summaries_DE_genes <- GO_enrich_DE_gene_modules %>%
  as.data.frame() %>% 
  group_by(Cluster) %>%
  slice_min(order_by=p.adjust, n = 5) %>%
  slice_head(n = 5) %>%
  mutate(Description = case_when(
    p.adjust < 0.05 ~ paste0(Description," (",Count,")"),
    TRUE ~ " "
  )) %>%
  summarize(description = str_c(Description, collapse = "\n")) %>%
  pull(description)


# draw plot with GO terms
cowplot::ggdraw(DE_genes_plot +
                  theme(plot.margin = margin(t = 0.5, r = 5, b = 0.5, l = 0.1, unit = "cm"))) +
  cowplot::draw_text(GO_summaries_DE_genes, 
                     x = 0.76,  # Adjust this value for the desired horizontal position
                     y = rev(seq(0.05, 0.82, length.out = length(GO_summaries_DE_genes))),  # Adjust this sequence for the desired vertical positions
                     size = 6, 
                     hjust = 0,  # Align GO terms to the left
                     vjust = 0.5)

Saving Graphs for Cytoscape

Questions

Question 1:

Be sure to knit this file into a pdf or html file once you’re finished.

System information for reproducibility:

pander::pander(sessionInfo())

R version 4.3.1 (2023-06-16)

Platform: aarch64-apple-darwin20 (64-bit)

locale: en_US.UTF-8||en_US.UTF-8||en_US.UTF-8||C||en_US.UTF-8||en_US.UTF-8

attached base packages: stats4, stats, graphics, grDevices, utils, datasets, methods and base

other attached packages: igraph(v.1.5.1), cowplot(v.1.1.1), org.Sc.sgd.db(v.3.18.0), AnnotationDbi(v.1.64.1), IRanges(v.2.36.0), S4Vectors(v.0.40.1), Biobase(v.2.62.0), BiocGenerics(v.0.48.1), matrixStats(v.1.0.0), edgeR(v.4.0.1), limma(v.3.58.1), WGCNA(v.1.72-1), fastcluster(v.1.2.3), dynamicTreeCut(v.1.63-1), reactable(v.0.4.4), Glimma(v.2.12.0), statmod(v.1.5.0), BiocManager(v.1.30.22), pander(v.0.6.5), knitr(v.1.45), lubridate(v.1.9.3), forcats(v.1.0.0), stringr(v.1.5.0), dplyr(v.1.1.3), purrr(v.1.0.2), readr(v.2.1.4), tidyr(v.1.3.0), tibble(v.3.2.1), ggplot2(v.3.4.4), tidyverse(v.2.0.0) and pacman(v.0.5.1)

loaded via a namespace (and not attached): splines(v.4.3.1), later(v.1.3.1), ggplotify(v.0.1.2), bitops(v.1.0-7), filelock(v.1.0.2), polyclip(v.1.10-6), preprocessCore(v.1.64.0), rpart(v.4.1.21), lifecycle(v.1.0.3), doParallel(v.1.0.17), lattice(v.0.22-5), vroom(v.1.6.4), MASS(v.7.3-60), backports(v.1.4.1), magrittr(v.2.0.3), Hmisc(v.5.1-1), sass(v.0.4.7), rmarkdown(v.2.25), jquerylib(v.0.1.4), yaml(v.2.3.7), httpuv(v.1.6.12), RColorBrewer(v.1.1-3), DBI(v.1.1.3), abind(v.1.4-5), zlibbioc(v.1.48.0), GenomicRanges(v.1.54.1), ggraph(v.2.1.0), RCurl(v.1.98-1.13), yulab.utils(v.0.1.0), nnet(v.7.3-19), tweenr(v.2.0.2), rappdirs(v.0.3.3), GenomeInfoDbData(v.1.2.11), enrichplot(v.1.22.0), ggrepel(v.0.9.4), tidytree(v.0.4.5), codetools(v.0.2-19), DelayedArray(v.0.28.0), ggforce(v.0.4.1), DOSE(v.3.28.0), tidyselect(v.1.2.0), aplot(v.0.2.2), farver(v.2.1.1), viridis(v.0.6.4), BiocFileCache(v.2.10.1), base64enc(v.0.1-3), jsonlite(v.1.8.7), ellipsis(v.0.3.2), tidygraph(v.1.2.3), Formula(v.1.2-5), survival(v.3.5-7), iterators(v.1.0.14), foreach(v.1.5.2), tools(v.4.3.1), treeio(v.1.26.0), HPO.db(v.0.99.2), Rcpp(v.1.0.11), glue(v.1.6.2), gridExtra(v.2.3), SparseArray(v.1.2.2), xfun(v.0.41), DESeq2(v.1.42.0), qvalue(v.2.34.0), MatrixGenerics(v.1.14.0), GenomeInfoDb(v.1.38.0), withr(v.2.5.2), fastmap(v.1.1.1), fansi(v.1.0.5), digest(v.0.6.33), gridGraphics(v.0.5-1), timechange(v.0.2.0), R6(v.2.5.1), mime(v.0.12), colorspace(v.2.1-0), GO.db(v.3.18.0), RSQLite(v.2.3.2), utf8(v.1.2.4), generics(v.0.1.3), data.table(v.1.14.8), graphlayouts(v.1.0.1), httr(v.1.4.7), htmlwidgets(v.1.6.2), S4Arrays(v.1.2.0), scatterpie(v.0.2.1), pkgconfig(v.2.0.3), gtable(v.0.3.4), blob(v.1.2.4), impute(v.1.76.0), XVector(v.0.42.0), shadowtext(v.0.1.2), clusterProfiler(v.4.10.0), htmltools(v.0.5.7), fgsea(v.1.28.0), scales(v.1.2.1), png(v.0.1-8), ggfun(v.0.1.3), rstudioapi(v.0.15.0), tzdb(v.0.4.0), reshape2(v.1.4.4), nlme(v.3.1-163), checkmate(v.2.3.0), curl(v.5.1.0), cachem(v.1.0.8), BiocVersion(v.3.18.0), parallel(v.4.3.1), HDO.db(v.0.99.1), foreign(v.0.8-85), pillar(v.1.9.0), grid(v.4.3.1), vctrs(v.0.6.4), promises(v.1.2.1), dbplyr(v.2.4.0), xtable(v.1.8-4), cluster(v.2.1.4), htmlTable(v.2.4.2), evaluate(v.0.23), cli(v.3.6.1), locfit(v.1.5-9.8), compiler(v.4.3.1), rlang(v.1.1.1), crayon(v.1.5.2), labeling(v.0.4.3), fs(v.1.6.3), plyr(v.1.8.9), stringi(v.1.7.12), viridisLite(v.0.4.2), BiocParallel(v.1.36.0), MPO.db(v.0.99.7), munsell(v.0.5.0), Biostrings(v.2.70.1), lazyeval(v.0.2.2), GOSemSim(v.2.28.0), Matrix(v.1.6-1.1), patchwork(v.1.1.3), hms(v.1.1.3), bit64(v.4.0.5), KEGGREST(v.1.42.0), shiny(v.1.7.5.1), SummarizedExperiment(v.1.32.0), interactiveDisplayBase(v.1.40.0), highr(v.0.10), AnnotationHub(v.3.10.0), memoise(v.2.0.1), bslib(v.0.5.1), ggtree(v.3.10.0), fastmatch(v.1.1-4), bit(v.4.0.5), gson(v.0.1.0) and ape(v.5.7-1)